# -*- coding: utf-8 -*-
from datetime import datetime

import re
import scrapy
import json


class TestSpider(scrapy.Spider):
    # 爬虫名（唯一）
    name = "cmiinv"
    # 爬取网站
    allowed_domains = ["cmiinv.com"]
    # 爬取列表
    start_urls = [
        "https://www.cmiinv.com/api/v2/cms/category/COVERAGE/name/%E5%AA%92%E4%BD%93%E6%8A%A5%E9%81%93?pageSize=5&page=1"
    ]

    # 增量爬取1天内的新闻，不翻页(动态)
    def parse(self, response):
        crawl_hour = datetime.now().hour

        json_array = json.loads(response.body)
        for json_value in json_array["results"]:
            time_str = str(json_value['pubDate'])[0:10]
            time_int = int(time_str)
            pub_time = datetime.fromtimestamp(time_int)
            days = (datetime.now() - pub_time).days
            pattern_content = re.compile(r'<span .*?>(.*?)</span>')
            article_content = pattern_content.findall(json_value['content'])
            print article_content

